"""
TF-IDF文本特征提取：返回的是权重矩阵，且是稀疏矩阵
"""
from sklearn.feature_extraction.text import TfidfVectorizer


corpus = ['I love programming',
          'Programming is fun']  # 语料库

vec = TfidfVectorizer()
X = vec.fit_transform(corpus)  # 计算tf-idf矩阵
print("tf-idf矩阵：\n", X.toarray())  # 输出tf-idf矩阵
print("特征名：\n", vec.get_feature_names_out())  # 输出特征名